The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images.
import tensorflow as tf
import pandas as pd
import numpy as np
import tensorboard
from datetime import datetime
from sklearn.preprocessing import Normalizer, MinMaxScaler, minmax_scale
from sklearn.pipeline import Pipeline
INFO:tensorflow:Enabling eager execution INFO:tensorflow:Enabling v2 tensorshape INFO:tensorflow:Enabling resource variables INFO:tensorflow:Enabling tensor equality INFO:tensorflow:Enabling control flow v2
from tensorflow.keras.datasets import cifar10
(X_train, _), (X_test, labels_test) = cifar10.load_data()
X_train = X_train.astype('float32') / 255.
X_test = X_test.astype('float32') / 255.
labels_name = {0: 'airplane',
1: 'automobile',
2: 'bird',
3: 'cat',
4: 'deer',
5: 'dog',
6: 'frog',
7: 'horse',
8: 'ship',
9: 'truck'}
X_train.shape
(50000, 32, 32, 3)
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D, Flatten, Reshape
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as kb
from tensorflow.keras import regularizers
encoder
input_img = Input(shape=(32,32,3))
regularizer = regularizers.l1(10e-5)
# model: encoder
encoded = Conv2D(64, (3, 3), activation='relu',
padding='same')(input_img)
#encoded = Conv2D(64, (3, 3), activation='relu',
# padding='same')(encoded)
encoded = MaxPooling2D((2, 2), padding='same')(encoded)
encoded = Conv2D(32, (3, 3), activation='relu',
padding='same')(encoded)
#encoded = Conv2D(32, (3, 3), activation='relu',
# padding='same')(encoded)
encoded = MaxPooling2D((2, 2), padding='same')(encoded)
encoded = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
input_encoded_before_flatten = kb.int_shape(encoded)
encoded = Flatten()(encoded)
encoder = Model(input_img, encoded)
input_encoded = Input(shape=kb.int_shape(encoded)[1:])
encoder.summary()
Model: "model" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_1 (InputLayer) [(None, 32, 32, 3)] 0 _________________________________________________________________ conv2d (Conv2D) (None, 32, 32, 64) 1792 _________________________________________________________________ max_pooling2d (MaxPooling2D) (None, 16, 16, 64) 0 _________________________________________________________________ conv2d_1 (Conv2D) (None, 16, 16, 32) 18464 _________________________________________________________________ max_pooling2d_1 (MaxPooling2 (None, 8, 8, 32) 0 _________________________________________________________________ conv2d_2 (Conv2D) (None, 8, 8, 8) 2312 _________________________________________________________________ flatten (Flatten) (None, 512) 0 ================================================================= Total params: 22,568 Trainable params: 22,568 Non-trainable params: 0 _________________________________________________________________
decoder
# model: decoder
decoded = Reshape(input_encoded_before_flatten[1:])(input_encoded)
decoded = Conv2D(32, (3, 3), activation='relu', padding='same')(decoded)
decoded = UpSampling2D((2, 2))(decoded)
#decoded = Conv2D(32, (3, 3), activation='relu', padding='same')(decoded)
decoded = Conv2D(64, (3, 3), activation='relu', padding='same')(decoded)
decoded = UpSampling2D((2, 2))(decoded)
#decoded = Conv2D(64, (3, 3), activation='relu', padding='same')(decoded)
decoded = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(decoded)
decoder = Model(input_encoded, decoded)
decoder.summary()
Model: "model_1" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_2 (InputLayer) [(None, 512)] 0 _________________________________________________________________ reshape (Reshape) (None, 8, 8, 8) 0 _________________________________________________________________ conv2d_3 (Conv2D) (None, 8, 8, 32) 2336 _________________________________________________________________ up_sampling2d (UpSampling2D) (None, 16, 16, 32) 0 _________________________________________________________________ conv2d_4 (Conv2D) (None, 16, 16, 64) 18496 _________________________________________________________________ up_sampling2d_1 (UpSampling2 (None, 32, 32, 64) 0 _________________________________________________________________ conv2d_5 (Conv2D) (None, 32, 32, 3) 1731 ================================================================= Total params: 22,563 Trainable params: 22,563 Non-trainable params: 0 _________________________________________________________________
autoencoder
# model: autoencoder
encoded = encoder(input_img)
decoded = decoder(encoded)
autoencoder = Model(input_img, decoded)
optimizer = Adam(learning_rate=0.002)
bce = tf.keras.losses.BinaryCrossentropy();
mse = tf.keras.losses.MeanSquaredError();
autoencoder.compile(optimizer=optimizer, loss=bce)
autoencoder.summary()
Model: "model_4" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_1 (InputLayer) [(None, 32, 32, 3)] 0 _________________________________________________________________ model (Functional) (None, 512) 22568 _________________________________________________________________ model_1 (Functional) (None, 32, 32, 3) 22563 ================================================================= Total params: 45,131 Trainable params: 45,131 Non-trainable params: 0 _________________________________________________________________
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3, min_delta=0.0005)
ckpt_filepath = 'model/cifar10_ae/checkpoints/cifar10_ae-ckpt'
checkpoint = tf.keras.callbacks.ModelCheckpoint(
filepath=ckpt_filepath,
save_weights_only=True,
monitor='val_loss',
save_best_only=True)
logdir="logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)
autoencoder.fit(X_train, X_train,
epochs=25,
batch_size=128,
validation_data=(X_test, X_test),
callbacks=[early_stopping,
checkpoint,
tensorboard_callback], # stop the training when no improvements are made
verbose=1)
Epoch 1/25 391/391 [==============================] - 9s 21ms/step - loss: 0.5699 - val_loss: 0.5651 Epoch 2/25 391/391 [==============================] - 7s 17ms/step - loss: 0.5644 - val_loss: 0.5653 Epoch 3/25 391/391 [==============================] - 7s 17ms/step - loss: 0.5634 - val_loss: 0.5649 Epoch 4/25 391/391 [==============================] - 7s 17ms/step - loss: 0.5640 - val_loss: 0.5646 Epoch 5/25 391/391 [==============================] - 7s 18ms/step - loss: 0.5643 - val_loss: 0.5646
<tensorflow.python.keras.callbacks.History at 0x24bb549c280>
# save model
save_filepath = 'model/cifar10_ae/last'
autoencoder.save(
filepath=save_filepath,
overwrite=True,
include_optimizer=True,
)
INFO:tensorflow:Assets written to: model/cifar10_ae/last\assets
# load model
save_filepath = 'model/cifar10_ae/last'
loaded_model = tf.keras.models.load_model(save_filepath)
autoencoder = loaded_model
input_img = autoencoder.input
encoded = autoencoder.layers[1].output
decoded = autoencoder.layers[2].output
encoder = Model(input_img, encoded)
decoder = Model(autoencoder.layers[2].input, decoded)
encoded_imgs = encoder.predict(X_test)
decoded_imgs = decoder.predict(encoded_imgs)
import matplotlib.pyplot as plt
done now
decoded_imgs_classified=np.zeros((10,1000,32,32,3))
original_imgs_classified=np.zeros((10,1000,32,32,3))
for i in range(10): # it should be better this way if each category has the same number of items
decoded_imgs_classified[i,:,:,:,:] = decoded_imgs[labels_test.reshape(-1,)==i].reshape(-1,32,32,3)
original_imgs_classified[i,:,:,:,:] = X_test[labels_test.reshape(-1,)==i].reshape(-1,32,32,3)
n = 10 # how many images we will display
plt.figure(figsize=(20, 4))
for i in range(n):
# display original
ax = plt.subplot(2, n, i + 1)
plt.imshow(original_imgs_classified[i,0])
plt.gray()
ax.set_xticks([])
ax.set_yticks([])
# display reconstruction
ax = plt.subplot(2, n, i + 1 + n)
plt.imshow(decoded_imgs_classified[i,0])
plt.gray()
ax.set_xticks([])
ax.set_yticks([])
ax.set_xlabel(labels_name[i])
plt.savefig('images/cifar10_ae/cifar10_reconstructed_all.png', dpi=120)
plt.show()
n = 10 # how many images we will display
fig = plt.figure(figsize=(20, 4))
selected_class = 0 # the class to be shown
for i in range(n):
# display original
ax = fig.add_subplot(2, n, i + 1)
plt.imshow(original_imgs_classified[selected_class,i])
plt.gray()
ax.set_xticks([])
ax.set_yticks([])
# display reconstruction
ax = fig.add_subplot(2, n, i + 1 + n)
plt.imshow(decoded_imgs_classified[selected_class,i])
plt.gray()
ax.set_xticks([])
ax.set_yticks([])
fig.suptitle(labels_name[selected_class])
plt.savefig('images/cifar10_ae/cifar10_reconstructed_'+str(selected_class)+'.png', dpi=120)
plt.show()
bce = tf.keras.losses.BinaryCrossentropy();
mse = tf.keras.losses.MeanSquaredError();
loss_test = np.zeros(10)
input_entropy = np.zeros(10)
for i in range(10):
loss_test[i] = bce(original_imgs_classified[i].reshape(-1,32*32*3),
decoded_imgs_classified[i].reshape(-1,32*32*3)).numpy()
input_entropy[i] = bce(original_imgs_classified[i].reshape(-1,32*32*3),
original_imgs_classified[i].reshape(-1,32*32*3)).numpy()
fig = plt.figure(figsize=(10,7.5))
ax = fig.add_subplot(111)
bce_bar = ax.bar(range(10),loss_test)
e_line, = ax.plot(range(10),input_entropy, 'r+-')
ax.set_xticks(range(10))
ax.set_xticklabels(labels_name.values())
#ax.set_ylabel(ylabel='Binary Cross Entropy', fontsize=15)
ax.set_ylim((0,.8))
bce_bar.set_label('Binary Cross Entropy')
e_line.set_label('Binary Entropy of Input')
ax.legend(loc='upper right',
fontsize='large')
ax.tick_params(labelsize=15)
ax.set_title('CIFAR10', fontsize=18)
fig.autofmt_xdate()
plt.savefig('images/cifar10_ae/cifar10_ae_bce.png',dpi=120)
plt.show()
mse = tf.keras.losses.MeanSquaredError();
rmse_test = np.zeros(10)
var_test = np.zeros(10)
for i in range(10):
rmse_test[i] = 255*255*mse(original_imgs_classified[i].reshape(-1,32*32*3),
decoded_imgs_classified[i].reshape(-1,32*32*3)).numpy()
rmse_test[i] = np.sqrt(rmse_test[i])
var_test[i] = 255*255*np.var(original_imgs_classified[i].reshape(-1,32*32*3))
var_test[i] = np.sqrt(var_test[i])
fig = plt.figure(figsize=(10,7.5))
ax = fig.add_subplot(111)
ax.bar(range(10),rmse_test)
ax.plot(range(10),var_test)
ax.set_xticks(range(10))
ax.set_xticklabels(labels_name.values())
ax.set_ylabel(ylabel='RMSE/STD', fontsize=15)
ax.tick_params(labelsize=15)
fig.autofmt_xdate()
plt.savefig('images/cifar10_ae/cifar10_ae_rmse.png',dpi=120)
plt.show()
from sklearn.manifold import TSNE
latent_manifold = TSNE(n_components=2, random_state=33)
latent_points = latent_manifold.fit_transform(encoded_imgs)
plt.figure(figsize=(10, 5))
plt.scatter(latent_points[:, 0],
latent_points[:, 1],
c=labels_test,
cmap='Paired',
label="t-SNE")
plt.savefig('images/cifar10_ae/cifar10_tsne.png', dpi=120)
plt.show()
import umap.umap_ as umap
import umap.plot
umap
<module 'umap' from 'C:\\Users\\nanli\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\umap\\__init__.py'>
mapper = umap.UMAP().fit(encoded_imgs)
umap.plot.points(mapper, labels=labels_test.reshape(-1,), color_key_cmap='Paired', background='black')
<AxesSubplot:>
plt.figure(figsize=(10, 5))
plt.scatter(mapper.embedding_[:, 0],
mapper.embedding_[:, 1],
c=labels_test,
cmap='Paired',
label="t-SNE")
plt.savefig('images/cifar10_ae/cifar10_umap.png', dpi=120)
plt.show()
%load_ext tensorboard
The tensorboard extension is already loaded. To reload it, use: %reload_ext tensorboard
ERROR: Timed out waiting for TensorBoard to start. It may still be running as pid 944.
%tensorboard --logdir logs
Reusing TensorBoard on port 6006 (pid 1480), started 0:01:59 ago. (Use '!kill 1480' to kill it.)